covid_df =
read_csv("./data/p8105_final_ped_covid.csv") %>%
janitor::clean_names() %>%
mutate(county = NA,
county = as.character(county)) %>%
select(city, county, everything()) %>%
mutate(city = tolower(city)) %>%
mutate(city = str_replace(city, "n white plains", "white plains")) %>%
mutate(county = case_when(city == "bronx" ~ "bronx",
city == "brooklyn" ~ "kings",
city == "yonkers" ~ "westchester",
city == "new york" ~ "new york",
city == "mount vernon" ~ "westchester",
city == "new rochelle" ~ "westchester",
city == "white plains" ~ "westchester",
city == "ridgewood" ~ "queens",
city == "nanuet" ~ "rockland",
city == "bergenfield" ~ "bergen",
city == "ossining" ~ "westchester",
city == "monroe" ~ "orange",
city == "newburgh" ~ "orange",
city == "staten island" ~ "richmond",
city == "port chester" ~ "westchester",
city == "spring valley" ~ "rockland",
city == "irvington" ~ "westchester",
city == "flushing" ~ "queens",
city == "chappaqua" ~ "westchester",
city == "new city" ~ "rockland",
city == "ferncliff manor" ~ "westchester",
city == "greenwich" ~ "washington",
city == "haverstraw" ~ "rockland",
city == "suffern" ~ "rockland",
city == "berkeley heights" ~ "union")
) %>%
mutate(eventdatetime = as.Date(eventdatetime, "%m/%d/%Y"),
eventdatetime = format(eventdatetime, "%m-%Y"),
eventdatetime = zoo::as.yearmon(eventdatetime, "%m-%Y")) %>%
mutate(
ethnicity_race = case_when(
race == "R3 Black or African-American" ~ "black",
race == "R2 Asian" ~ "asian",
race == "R5 White" ~ "caucasian",
race == "R1 American Indian or Alaska Native" ~ "american indian",
race == "Multiple Selected" ~ "multiple",
ethnicity == "E1 Spanish/Hispanic/Latino" ~ "latino"
))
# Map Viz using tidycensus and tmap (use ses)
county_ny = c("bronx", "kings", "westchester", "new york", "queens", "rockland", "orange", "richmond")
county_nj = c("bergen", "union")
shape_ny =
get_acs(geography = "tract",
variables = "B19013_001",
state = "NY",
county = county_ny,
geometry = TRUE) %>%
janitor::clean_names() %>%
select(name, geometry) %>%
separate(name, into = c("county", "state"), sep = -17) %>%
mutate(state = str_sub(state, 10),
county = tolower(county),
county = sub(".*\\s", "", trimws(county)),
county = str_replace(county, "york", "new york"))
shape_nj =
get_acs(geography = "tract",
variables = "B19013_001",
state = "NJ",
county = county_nj,
geometry = TRUE) %>%
janitor::clean_names() %>%
select(name, geometry) %>%
separate(name, into = c("county", "state"), sep = -18) %>%
mutate(state = str_sub(state, 9),
county = tolower(county),
county = sub(".*\\s", "", trimws(county)))
shape_full =
rbind(shape_ny, shape_nj)
# admitted =
# covid_df %>%
# group_by(county, admitted) %>%
# summarize(count = n())
# ny_map =
# left_join(shape_ny, bmi_mean, by = "county")
bmi_mean =
covid_df %>%
group_by(county) %>%
summarize(bmi_mean = mean(bmi_value, na.rm = TRUE))
ses_mean =
covid_df %>%
group_by(county) %>%
summarize(ses_mean = mean(ses, na.rm = TRUE))
full_map_bmi =
left_join(shape_full, bmi_mean, by = "county")
full_map_ses =
left_join(shape_full, ses_mean, by = "county")
tmap_mode("view")
tm_shape(full_map_bmi) +
tm_fill(
col = "bmi_mean",
palette = "viridis",
style = "quantile",
contrast = c(0.3, 1),
title = "Average BMI",
textNA = "Not Available",
id = "state",
popup.vars=c("County: " = "county",
"Average BMI: " = "bmi_mean")) +
tm_borders(col = "white") +
tm_shape(full_map_ses) +
tm_fill(
col = "ses_mean",
palette = "RdYlBu",
style = "quantile",
contrast = c(0.3, 1),
title = "Average SES",
textNA = "Not Available",
id = "state",
popup.vars=c("County: " = "county",
"Average SES: " = "ses_mean")) +
tm_borders(col = "white") +
tm_view(
alpha = 0.85,
view.legend.position = c("right", "bottom")) +
tm_scale_bar(text.size = 1) +
tm_facets(nrow = 1, sync = TRUE)
# Difference between admitted over the year
plot_1 =
covid_df %>%
group_by(eventdatetime, admitted) %>%
summarize(count = n()) %>%
ggplot(aes(x = eventdatetime,
y = count,
color = admitted)) +
geom_point(aes(text = paste("Date: ", eventdatetime,
"\nNumber of Counts: ", count,
"\nAdmitted: ", admitted))) +
geom_line() +
labs(title = "",
x = "Event Date",
y = "Number of Events",
color = "Admitted")
ggplotly(plot_1, tooltip = "text")
# Race and admitted
plot_2 =
covid_df %>%
group_by(ethnicity_race, admitted) %>%
summarize(count = n()) %>%
filter(ethnicity_race != "american indian") %>%
filter(ethnicity_race != "multiple") %>%
ggplot(aes(x = fct_reorder(ethnicity_race, count),
y = count,
fill = admitted,
text = paste("Ethnicity: ", ethnicity_race,
"\nNumber of Counts: ", count,
"\nAdmitted: ", admitted))) +
geom_bar(stat="identity", position=position_dodge()) +
coord_flip() +
labs(title = "",
x = "Number of Counts",
y = "Ethnicity",
fill = "Admitted")
ggplotly(plot_2, tooltip = "text")
plot_3 =
covid_df %>%
filter(ethnicity_race != "multiple") %>%
ggplot(aes(x = age, y = bmi_value, color = ethnicity_race)) +
geom_point(aes(text = paste("Age: ", age,
"\nBMI: ", bmi_value,
"\nEthnicity: ", ethnicity_race))) +
geom_smooth(se = FALSE) +
labs(title = "",
x = "Age",
y = "BMI Value",
color = "Ethnicity")
ggplotly(plot_3, tooltip = "text")
plot_4 =
covid_df %>%
mutate(age = round(age),
age = as.factor(age)) %>%
group_by(age, admitted) %>%
summarize(count = n()) %>%
ggplot(aes(x = age,
y = count,
fill = admitted,
text = paste("Age: ", age,
"\nNumber of Counts: ", count,
"\nAdmitted: ", admitted))) +
geom_bar(stat="identity", position=position_dodge()) +
labs(title = "",
x = "Age",
y = "Number of Counts",
fill = "Admitted")
ggplotly(plot_4, tooltip = "text")
plot_5 =
covid_df %>%
ggplot(aes(x = age, y = bmi_value, color = admitted)) +
geom_point(aes(text = paste("Age: ", age,
"\nBMI Value: ", bmi_value,
"\nAdmitted: ", admitted))) +
geom_smooth(se = FALSE) +
labs(title = "",
x = "Age",
y = "BMI Value",
color = "Admitted")
ggplotly(plot_5, tooltip = "text")
# Barplot using age / count reorder
plot_6 =
covid_df %>%
group_by(city, county) %>%
summarize(mean_age = mean(age, na.rm = TRUE),
count = n()) %>%
drop_na(city) %>%
ggplot(aes(x = fct_reorder(city, mean_age),
y = mean_age,
fill = count,
text = paste("City: ", city,
"\nAvg Age: ", mean_age,
"\nCounts: ", count))) +
geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
labs(title = "",
x = "City",
y = "Average Age",
fill = "Count")
ggplotly(plot_6, tooltip = "text")
# plot_ly(x = ~fct_reorder(city, mean_age), y = ~mean_age,
# type = "bar",
# color = ~county,
# colors = "viridis",
# alpha = .5) %>%
# layout(title = "",
# xaxis = list(title = ""),
# yaxis = list(title = ""),
# barmode = "stack")